import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
import seaborn as sns
import matplotlib as mpl
from matplotlib.lines import Line2D
import yaml
from scipy import stats
import sys
sys.path.append('../utils/')
from ImagingUtilities import *
import warnings
warnings.filterwarnings('ignore')
from scipy.optimize import curve_fit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_log_error, r2_score
with open("../data/resources/rcParams.yaml") as f:
rcParamsDict = yaml.full_load(f)
for k in rcParamsDict["rcParams"]:
print("{} {}".format(k,rcParamsDict["rcParams"][k]))
plt.rcParams[k] = rcParamsDict["rcParams"][k]
for k1 in set(list(rcParamsDict)).difference(set(["rcParams"])):
print("{} {}".format(k1,rcParamsDict[k1]))
figure.dpi 80 savefig.dpi 500 figure.figsize [10, 10] axes.facecolor None figure.facecolor None dotSize 20
line_palette = {
'CTL01A': '#DBB807',
'CTL08A': '#0FB248',
'CTL04E': '#FF0054',
'CTL02A': '#7B00FF',
'H9': '#72190E',
'H1': '#994F88',
'CTL05A': '#1965B0',
'CTL07C': '#437DBF',
'CTL06F': '#CAE0AB',
'CTL09A': '#FFFF00',
'KTD8.2': '#E65518',
'UCSFi001-A': '#7BAFDE'}
total_df = pd.read_csv('../../iPSC_imaging/quantifications/quantification.csv', index_col=0)
add_tp = pd.read_csv('../../iPSC_imaging/quantifications/quantification_addTP.csv', index_col=0)
total_df[ ~ total_df.duplicated()]
total_df = pd.concat([total_df, add_tp])
donor_map_names = {i:j for i, j in zip(total_df['line'], total_df['line'])}
donor_map_names['CHD2WT'] = 'UCSFi001-A'
donor_map_names['CHD8WT'] = 'H9'
total_df['line'] = total_df['line'].map(donor_map_names)
total_df.shape
(2776, 16)
total_df['pixel_size'] = 1.38
total_df['Area (microm2)'] = total_df.total_area * 1.38
total_df['Area (mm2)'] = total_df['Area (microm2)'] / 1000
total_df['line'].unique()
array(['H1', 'CTL04E', 'CTL02A', 'CTL05A', 'H9', 'KTD8.2', 'CTL09A',
'CTL06F', 'CTL08A', 'CTL07C', 'UCSFi001-A', 'CTL01A'], dtype=object)
all_tp = total_df.time_point.unique()
all_tp.sort()
all_tp
array(['01_11_23_t18', '02_11_23_t18', '02_11_23_t9', '03_11_23_t18',
'03_11_23_t9', '04_11_23_t10', '04_11_23_t18', '05_11_23_t18',
'05_11_23_t9', '06_11_23_t18', '06_11_23_t9', '07_11_23_t18',
'07_11_23_t9', '08_11_23_t18', '08_11_23_t9', '09_11_23_t20',
'09_11_23_t9', '10_11_23_t18', '10_11_23_t9', '11_11_23_t18',
'11_11_23_t9', '12_11_23_t13', '12_11_23_t18', '13_11_23_t18',
'13_11_23_t9', '14_11_23_t17', '14_11_23_t9', '15_11_23_t17',
'15_11_23_t9', '16_11_23_t10', '16_11_23_t18', '17_11_23_t17',
'17_11_23_t9', '18_11_23_t10', '18_11_23_t18', '19_11_23_t10',
'19_11_23_t18', '20_11_23_t18', '20_11_23_t9', '21_11_23_t17',
'21_11_23_t9', '22_11_23_t10', '31_10_23_t18', '31_10_23_t9'],
dtype=object)
Here I'm adding a few quantifications:
norm_factor: the normalization factor, corresponding to the mean area of the area percentage of all lines for each time point post split (called split_time)perc_area_norm: the normalized percentage area, corresponding to the percentage area divided by the the normalization factor (1.)mean_area_tp: the mean area of each line at each time point post split (called split_time)area_error: the percentage "error" of the total area computed with respect to the mean of that line at that time pointstd: the standard deviation of each area with respect to (3.)cv: the coefficient of variation, corresponding to the ratio between the standard deviation and the meantotal_df['line_split'] = total_df['line'].astype('str') + '_' + total_df['split_time'].astype('str')
mean_df_time_point = total_df.groupby(['split_time']).mean('perc_area')
mean_df_time_point_dict = {i:j for i, j in zip(mean_df_time_point.index, mean_df_time_point.perc_area)}
mean_df_time_point_dict
area_df_time_point = total_df.groupby(['line','split_time']).mean('Area (microm2)').reset_index()
area_df_time_point['line_split'] = area_df_time_point['line'].astype('str') + '_' + area_df_time_point['split_time'].astype('str')
area_df_time_point = {i:j for i, j in zip(area_df_time_point.line_split, area_df_time_point['Area (microm2)'])}
area_df_time_point
total_df['norm_factor'] = total_df.split_time.map(mean_df_time_point_dict)
total_df['perc_area_norm'] = total_df['perc_area'] / total_df['norm_factor']
total_df['mean_area_tp'] = total_df.line_split.map(area_df_time_point)
total_df['area_error'] = (total_df['mean_area_tp'] - total_df['Area (microm2)']) / total_df['Area (microm2)']
total_df['std'] = np.sqrt((total_df['Area (microm2)'] - total_df['mean_area_tp'])**2 / len(total_df))
total_df['cv'] = total_df['std'] / total_df['mean_area_tp']
total_df['line_n_split'] = total_df['line'] + '_' + total_df['n_split'].astype('str')
sns.kdeplot(total_df['cv'])
<Axes: xlabel='cv', ylabel='Density'>
ax = sns.kdeplot(total_df['area_error'])
ax.axvline(15)
<matplotlib.lines.Line2D at 0x7f22cbcf05e0>
ax = sns.kdeplot(total_df['area_error'])
ax.set_xlim(-1, 30)
ax.axvline(10)
<matplotlib.lines.Line2D at 0x7f22c7bbb970>
fig, ax = plt.subplots(figsize = (20, 10))
sns.scatterplot(data = total_df, y = 'perc_area', x = 'split_time', ax = ax, hue = 'line', palette=line_palette)
<Axes: xlabel='split_time', ylabel='perc_area'>
total_df = total_df[~((total_df['split_time'] < 25) & (total_df['perc_area'] > 10))]
ax = sns.kdeplot(np.log10(total_df['area_error']))
total_df = total_df[total_df['area_error'] < 5]
total_df.shape
(2170, 25)
idx_max = total_df.groupby('line')['n_split'].idxmax()
# Filter the DataFrame using these indices
filtered_df = total_df.drop(idx_max)
filtered_df
| total_area | perc_area | mean_area_per_colony | n_colonies | time_point | confluency/generation | hour | month | day | line | ... | split_time | pixel_size | Area (microm2) | Area (mm2) | line_split | mean_area_tp | area_error | std | cv | line_n_split | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CTL04E_3_14_11_1.czi | 2532723.0 | 26.606326 | 1.266362e+06 | 2.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | CTL04E | ... | 159.0 | 1.38 | 3495157.74 | 3495.15774 | CTL04E_159.0 | 3.497834e+06 | 0.000766 | 50.791607 | 0.000015 | CTL04E_3 |
| CTL02A_3_14_11_3.czi | 1051145.0 | 11.042308 | 1.313931e+05 | 8.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | CTL02A | ... | 44.0 | 1.38 | 1450580.10 | 1450.58010 | CTL02A_44.0 | 1.548576e+06 | 0.067556 | 1859.928320 | 0.001201 | CTL02A_3 |
| CHD8WT_3_14_11_1.czi | 1655866.0 | 17.394919 | 1.655866e+05 | 10.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | H9 | ... | 44.0 | 1.38 | 2285095.08 | 2285.09508 | H9_44.0 | 9.615470e+05 | -0.579209 | 25120.598728 | 0.026125 | H9_3 |
| KTD8.2_3_14_11_4.czi | 862216.0 | 9.057603 | 2.874053e+05 | 3.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | KTD8.2 | ... | 72.0 | 1.38 | 1189858.08 | 1189.85808 | KTD8.2_72.0 | 7.105711e+05 | -0.402810 | 9096.742330 | 0.012802 | KTD8.2_3 |
| CTL09A_4_14_11_5.czi | 165602.0 | 1.739654 | 2.760033e+04 | 6.0 | 14_11_23_t9 | generation | 9 | 11 | 14 | CTL09A | ... | 24.0 | 1.38 | 228530.76 | 228.53076 | CTL09A_24.0 | 2.885001e+05 | 0.262413 | 1138.202417 | 0.003945 | CTL09A_4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| CTL08A_5_22_11_2.czi | 946317.0 | 9.941087 | 6.759407e+04 | 14.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | CTL08A | ... | 17.0 | 1.38 | 1305917.46 | 1305.91746 | CTL08A_17.0 | 1.797598e+06 | 0.376502 | 9331.962580 | 0.005191 | CTL08A_5 |
| CTL08A_5_22_11_3.czi | 638843.0 | 6.711064 | 1.064738e+05 | 6.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | CTL08A | ... | 17.0 | 1.38 | 881603.34 | 881.60334 | CTL08A_17.0 | 1.797598e+06 | 1.039010 | 17385.334447 | 0.009671 | CTL08A_5 |
| CTL01A_5_22_11_4.czi | 834622.0 | 8.767728 | 5.961586e+04 | 14.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | CTL01A | ... | 17.0 | 1.38 | 1151778.36 | 1151.77836 | CTL01A_17.0 | 1.359752e+06 | 0.180567 | 3947.276901 | 0.002903 | CTL01A_5 |
| CTL08A_5_22_11_4.czi | 857634.0 | 9.009469 | 1.072042e+05 | 8.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | CTL08A | ... | 17.0 | 1.38 | 1183534.92 | 1183.53492 | CTL08A_17.0 | 1.797598e+06 | 0.518838 | 11654.751425 | 0.006484 | CTL08A_5 |
| CHD8WT_5_22_11_1.czi | 245495.0 | 2.578932 | 1.227475e+05 | 2.0 | 22_11_23_t10 | generation | 10 | 11 | 22 | H9 | ... | 17.0 | 1.38 | 338783.10 | 338.78310 | H9_17.0 | 1.725624e+06 | 4.093595 | 26321.881011 | 0.015254 | H9_5 |
2151 rows × 25 columns
total_df['logArea'] = np.log10(total_df['Area (microm2)'] + 0.000001)
Here we fitted a polynomial regression function of order 3 (exploratory to look at what type of shapes we expect from the curves):
order = total_df.line.unique().tolist()
order.sort()
sns.set_theme(style="ticks")
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(total_df.sort_values(by = 'split_time'), col="line", hue = 'line', palette=line_palette,
col_wrap=4, height=5, col_order = order)
# Draw a line plot to show the trajectory of each random walk
grid.map(sns.regplot, "split_time", "Area (mm2)", order = 3)
grid.set_axis_labels("Time point post split", "Area (mm2)")
# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
Without fitting any regression (line goes through the mean and the highligthed data around is the standard deviation):
sns.set_theme(style="ticks")
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(total_df.sort_values(by = 'split_time'), col="line", hue = 'line', palette=line_palette,
col_wrap=4, height=5, col_order = order)
# Draw a line plot to show the trajectory of each random walk
grid.map(sns.lineplot, "split_time", "Area (mm2)", markers = True)
grid.set_axis_labels("Time point post split", "Total Area (pixels)")
# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
We use the area then average all the FOV for a specific time points in each line. The plot is composed by:
gaussian_filter1d from numpy),fig, ax = plt.subplots(4, 3, figsize=(30, 21), gridspec_kw={'hspace': 0.7})
ax = ax.flatten()
for ax_index, line in enumerate(total_df.line.unique()):
sub = total_df[(total_df.line == line) & (total_df.n_split != 'day')].sort_values(by='datetime')
mean_st = sub.groupby('split_time')['Area (mm2)'].mean()
std_st = sub.groupby('split_time')['Area (mm2)'].std()
y_pos = mean_st.index
ydata = sub['Area (microm2)'].values
xdata = sub.split_time.values.astype('int')
farray = mean_st.values
# Smoothing
farray_smooth = gaussian_filter1d(farray, sigma=3)
# Error formatting
upper_err = gaussian_filter1d(farray + (std_st / 2).values, sigma=3)
lower_err = gaussian_filter1d(farray - (std_st / 2).values, sigma=3)
ax[ax_index].scatter(xdata, ydata)
ax[ax_index].plot(y_pos, farray, '--', linewidth=0.7, color='k', alpha=0.45)
ax[ax_index].plot(y_pos, farray_smooth, color='#2374AB')
ax[ax_index].fill_between(y_pos, upper_err, lower_err, color='crimson', alpha=0.2)
# Use meaningful limits for better visualization
ax[ax_index].set_ylim(0, np.max(farray) * 1.25)
# Rotate x-axis labels for better readability
ax[ax_index].tick_params(axis='x', rotation=90)
# Set axis labels and title
ax[ax_index].set_title(f'Line {line}')
ax[ax_index].set_ylabel('Total area')
ax[ax_index].set_xlabel('Time point after split')
plt.tight_layout() # Adjust layout to prevent overlapping
plt.show()
Here we fitted a polynomial regression function of order 3:
sns.set_theme(style="ticks")
split_palette = {'1': '#264653', '2': '#2a9d8f', '3': '#8ab17d', '4': '#e9c46a', '5': '#f4a261', '6': '#e76f51'}
line_split_palette = {}
for i in total_df.line_n_split.unique():
split_n = i.split('_')[-1]
line_split_palette[i] = split_palette[split_n]
custom_handles = [Line2D([0], [0], color=color, lw=2) for color in split_palette.values()]
grid = sns.FacetGrid(total_df.sort_values(by = 'split_time'), col="line", hue = 'line_n_split', palette=line_split_palette,
col_wrap=4, height=5, col_order = order)
grid.map(sns.lineplot, "split_time", "Area (mm2)", markers = True)
grid.add_legend()
if grid._legend:
grid._legend.remove()
legend = grid.fig.legend(custom_handles, split_palette.keys(), ncol=2, frameon=False, bbox_to_anchor = (1.2,1), fontsize = 25)
legend.set_title('Passage number', prop={'size': 30})
for ax in grid.axes.flat:
ax.set_title(ax.get_title(), fontsize=35)
# Increase the size of x and y tick labels
for ax in grid.axes.flat:
ax.tick_params(axis='x', labelsize=20)
ax.tick_params(axis='y', labelsize=20)
grid.set_axis_labels("Hours after splitting", "Total area (mm2)", fontsize = 25)
grid.fig.tight_layout(w_pad=1)
grid.fig.savefig('./figures/raw_GC_iPSC_dividedSplit.svg', dpi = 300, bbox_inches = 'tight')
I need to filter out the combination of "line" - "number of split" that do not have enough data point to fit an order 3 polynomial regression:
sns.set_theme(style="ticks")
boolean_sel = pd.Series(total_df.groupby(['line_n_split'])['split_time'].count() > 5)
boolean_sel = boolean_sel[boolean_sel]
filtered_total = total_df[total_df.line_n_split.isin(boolean_sel.index)]
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(filtered_total.sort_values(by = 'split_time'), col="line_n_split", hue = 'line', palette=line_palette,
col_wrap=4, height=5, col_order = order)
grid.map(sns.regplot, "split_time", "Area (microm2)", order = 3)
# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
We use the area then average all the FOV for a specific time points in each line at each split. The plot is composed by:
gaussian_filter1d from numpy),fig, ax = plt.subplots(9,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
summary_dfs_dict = {}
for l in order:
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
splits = sub.n_split.unique().tolist()
splits.sort()
for split in splits:
subsub = sub[sub.n_split == split]
if len(subsub.split_time.unique()) > 5:
ydata = subsub['Area (microm2)'].values
xdata = subsub.split_time.values.astype('int')
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
summary_dfs_dict[f'{l}_split_{split}'] = summary_df
farray = np.array(summary_df['mean'])
y_pos = summary_df.split_time.values
# Smoothing
farray_smooth = gaussian_filter1d(farray, sigma=3)
# Error formatting
upper_err = gaussian_filter1d(farray + (summary_df['stds'] / 2), sigma=3)
lower_err = gaussian_filter1d(farray - (summary_df['stds'] / 2), sigma=3)
ax[ax_index].scatter(xdata, ydata)
ax[ax_index].plot(y_pos, farray, '--', linewidth=0.7, color='k', alpha=0.45)
ax[ax_index].plot(y_pos, farray_smooth, color = '#2374AB')
ax[ax_index].fill_between(y_pos, upper_err, lower_err, color='crimson', alpha=0.2)
#ax[ax_index].errorbar(y_pos, farray, yerr=summary_df['stds'], fmt='none', color='crimson', alpha=0.5)
#
ax[ax_index].set_ylim(0, np.max(farray)+(np.max((farray)*25)/100))
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{l}_split_{split}')
ax[ax_index].set_ylabel('Total area')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
else:
print(f'Skipped split {subsub.n_split.values[0]} of line {subsub.line.values[0]}')
plt.show()
Skipped split 3 of line CTL01A Skipped split 1 of line CTL04E Skipped split 1 of line CTL09A Skipped split 1 of line UCSFi001-A
We use the logarithm of the area then sum all the FOV for a specific time points in each line at each split. The plot is composed by:
gaussian_filter1d from numpy),fig, ax = plt.subplots(10,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for l in order:
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
splits = sub.n_split.unique().tolist()
splits.sort()
for split in splits:
subsub = sub[sub.n_split == split]
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='logArea', final_output='area_sum')
farray = np.array(summary_df['area_sum'])
y_pos = summary_df.datetime.values
# Smoothing
farray_smooth = gaussian_filter1d(farray, sigma=1)
# Error formatting
upper_err = gaussian_filter1d(farray + (summary_df['stds'] / 2), sigma=1)
lower_err = gaussian_filter1d(farray - (summary_df['stds'] / 2), sigma=1)
ax[ax_index].plot(y_pos, farray, '--', linewidth=0.7, color='k', alpha=0.45)
ax[ax_index].plot(y_pos, farray_smooth, color = '#2374AB')
ax[ax_index].fill_between(y_pos, upper_err, lower_err, color='crimson', alpha=0.2)
#
ax[ax_index].set_ylim(0, np.max(farray)+(np.max((farray)*25)/100))
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{l}_split_{split}')
ax[ax_index].set_ylabel('Log total area')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.show()
lines = total_df.line.unique()
We use the total area and then sum all the FOV for a specific time points in each line at each split. Then we smoothed it with gaussian_filter1d and computed the diff, corresponding to the first discrete derivative.
discrete_deriv_curves = {}
fig, ax = plt.subplots(10,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for l in total_df.line.unique():
#color = color_dict[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 3)
summary_df['derivative'] = summary_df['smoothed'].diff() / summary_df['split_time'].diff()
discrete_deriv_curves[f'{l}_split_{split}'] = summary_df
farray = np.array(summary_df['derivative'])
y_pos = summary_df.split_time.values
ax[ax_index].errorbar(y_pos, farray, marker = 'o')
ax[ax_index]
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{l}_split_{split}')
ax[ax_index].set_ylabel('Discrete derivative')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.show()
We collected the results for each line and each split and we can use it to converge to a single result for each line using as replicates the splits.
deriv_df = pd.concat(discrete_deriv_curves.values(), keys = discrete_deriv_curves.keys()).reset_index()
deriv_df['line'] = deriv_df['level_0'].apply(lambda x: x.split('_')[0])
deriv_df['split'] = deriv_df['level_0'].apply(lambda x: x.split('_')[-1])
fig, ax = plt.subplots(4,3, figsize = (5*4, 7*3), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for line in deriv_df.line.unique():
#color = color_dict[l]
sub = deriv_df[deriv_df.line == line]
sub = sub.sort_values('split_time')
sub['smoothed'] = gaussian_filter1d(sub['derivative'], sigma = 3)
sns.lineplot(data = sub, y = 'smoothed', x = 'split_time', hue = 'split', ax = ax[ax_index], errorbar='sd', markers = True, palette=split_palette)
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line}')
ax[ax_index].set_ylabel('Discrete derivative')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
fig, ax = plt.subplots(4,3, figsize = (5*4, 7*3)) #, gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for line in deriv_df.line.unique():
#color = color_dict[l]
sub = deriv_df[deriv_df.line == line]
sub = sub.sort_values('split_time')
sub['smoothed'] = gaussian_filter1d(sub['derivative'], sigma = 3)
sns.lineplot(data = sub, y = 'smoothed', x = 'split_time', ax = ax[ax_index], errorbar='sd', markers = True, err_style = 'bars')
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line}')
ax[ax_index].set_ylabel('Discrete derivative')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.tight_layout()
We use the total area and then average all the FOV for a specific time points in each line at each split. Then we smoothed it with gaussian_filter1d and computed the cumulative sum over the discrete differential of the growth. with the cumsum() function.
fig, ax = plt.subplots(9,5, figsize = (5*10, 7*9))
ax = ax.flatten().T
ax_index = 0
order_line_n = total_df.line_n_split.unique().tolist()
order_line_n.sort()
for line_n in order_line_n:
sub = total_df[total_df.line_n_split == line_n]
#sub = sub[~sub.index.duplicated()]
sub = sub.sort_values('split_time')
if len(sub.split_time.unique()) > 5:
#sub['smoothed'] = sub.groupby('line_n_split')['Area (microm2)'].apply(gaussian_filter1d, sigma = 3).loc[line_n]
y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
x = sub.split_time.unique()
sns.lineplot(y = y, x = x, ax = ax[ax_index], errorbar='sd', markers = True, err_style='bars')
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line_n}')
ax[ax_index].set_ylabel('Cumulative of mean total area')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.tight_layout()
sns.set(style="white", palette="Paired", color_codes=True)
fig, ax = plt.subplots(figsize=(7,5))
labels = []
lc = []
handles = []
all_lines = {}
total_df_no_first = total_df[total_df.n_split != '1'].copy()
for l in total_df.line.unique():
color = line_palette[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (mm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 1)
summary_df['cumulative'] = summary_df['smoothed'].cumsum()
farray = np.array(summary_df['cumulative'])
y_pos = summary_df.split_time.values
line, = ax.plot(y_pos, farray, color = color, marker = '.')
ax.xaxis.set_tick_params(rotation=90)
all_lines[f'{l}_{split}'] = line
labels.append(l)
handles.append(line)
lc.append(color)
plt.legend(handles, labels, bbox_to_anchor = (1,1))
<matplotlib.legend.Legend at 0x7f22c4fd7910>
lines = total_df.line.unique()
highlight_growth_curves(all_lines, xlabel = 'Hours from split', ylabel = 'Cumulative growth', lines = lines, fontsize = 20)
#plt.savefig('growth_curve_per_line.pdf', dpi = 300)
len(np.float64(list(range(0,200))))
200
fig, ax = plt.subplots(5,9, figsize = (5*12, 8*5))
ax = ax.flatten().T
ax_index = 0
fitted_param = {}
def exp_model(t, a, b, c):
return a * np.exp(b * t) + c - a
for line_n in order_line_n:
fitted_param[line_n] = {}
sub = total_df[total_df.line_n_split == line_n]
sub = sub.sort_values('split_time')
if len(sub.split_time.unique()) > 5:
# Calculate the cumulative sum of the mean total area for each split_time
y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
y_min_0 = sub[(sub.split_time == sub.split_time.min()) & (sub['Area (mm2)'] > 0)]['Area (mm2)'].min()
#print(y_min_0)
x = np.array(sub.split_time.unique())
hue = [line_n.split('_')[0]]*len(y)
try:
#print(min(y))
popt, pcov = curve_fit(exp_model, x, y, p0=(min(y), 0.1, y_min_0), bounds=((-np.inf, 0, y_min_0), (np.inf,np.inf, np.inf)))
fitted_param[line_n]['a'] = popt[0]
fitted_param[line_n]['rate'] = popt[1]
fitted_param[line_n]['min(y)'] = min(y)
fitted_param[line_n]['c'] = popt[2]
fitted_param[line_n]['mean_cum'] = y
fitted_param[line_n]['split_time'] = x
a, b, c = popt
# Generate fitted y values
y_fitted = exp_model(x, a, b, c)
y_fitted_viz = exp_model(np.float64(list(range(0, 200))), a, b, c)
fitted_param[line_n]['y_fitted'] = y_fitted
fitted_param[line_n]['MSLE'] = mean_squared_log_error(y, y_fitted)
fitted_param[line_n]['r2'] = r2_score(y, y_fitted)
sns.lineplot(y = y_fitted_viz, x = np.float64(list(range(0, 200))), ax = ax[ax_index]) #, markers = True, hue = hue, palette=line_palette, linewidth = 5, legend = None)
sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, s = 150, legend=None)
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line_n}', fontsize = 40)
ax[ax_index].set_ylabel('Cumulative area (mm2)', fontsize = 35)
ax[ax_index].set_xlabel('Time point', fontsize = 35)
ax[ax_index].set_xlim(-10, 220)
ax[ax_index].set_ylim(-10, 85000)
_ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 30)
_ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 30)
ax_index += 1
except RuntimeError as e:
print(f"Fitting failed for line_n {line_n}: {e}")
plt.tight_layout()
plt.savefig('./figures/Fitted_cum_area_per_split_new_viz.svg', dpi = 300, bbox_inches = 'tight')
Fitting failed for line_n CTL01A_5: Optimal parameters not found: The maximum number of function evaluations is exceeded.
fig, ax = plt.subplots(5,9, figsize = (5*12, 8*5))
ax = ax.flatten().T
ax_index = 0
fitted_param = {}
def exp_model(t, a, b, c):
return a * np.exp(b * t) + c - a
for line_n in order_line_n:
fitted_param[line_n] = {}
sub = total_df[total_df.line_n_split == line_n]
sub = sub.sort_values('split_time')
if len(sub.split_time.unique()) > 5:
# Calculate the cumulative sum of the mean total area for each split_time
y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
y_min_0 = sub[(sub.split_time == sub.split_time.min()) & (sub['Area (mm2)'] > 0)]['Area (mm2)'].min()
#print(y_min_0)
x = np.array(sub.split_time.unique())
hue = [line_n.split('_')[0]]*len(y)
try:
#print(min(y))
popt, pcov = curve_fit(exp_model, x, y, p0=(min(y), 0.1, y_min_0), bounds=((-np.inf, 0, y_min_0), (np.inf,np.inf, np.inf)))
fitted_param[line_n]['a'] = popt[0]
fitted_param[line_n]['rate'] = popt[1]
fitted_param[line_n]['min(y)'] = min(y)
fitted_param[line_n]['c'] = popt[2]
fitted_param[line_n]['mean_cum'] = y
fitted_param[line_n]['split_time'] = x
a, b, c = popt
# Generate fitted y values
y_fitted = exp_model(x, a, b, c)
y_fitted_viz = exp_model(np.float64(list(range(0, 200))), a, b, c)
fitted_param[line_n]['y_fitted'] = y_fitted
#fitted_param[line_n]['MSLE'] = mean_squared_log_error(y, y_fitted)
fitted_param[line_n]['r2'] = r2_score(y, y_fitted)
sns.lineplot(y = y_fitted, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, linewidth = 5, legend = None)
sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, s = 150, legend=None)
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line_n}', fontsize = 40)
ax[ax_index].set_ylabel('Cumulative area (mm2)', fontsize = 35)
ax[ax_index].set_xlabel('Time point', fontsize = 35)
#ax[ax_index].set_xlim(-10, 220)
#ax[ax_index].set_ylim(-10, 85000)
_ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 30)
_ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 30)
ax_index += 1
except RuntimeError as e:
print(f"Fitting failed for line_n {line_n}: {e}")
plt.tight_layout()
plt.savefig('./figures/Fitted_cum_area_per_split_orig_viz.svg', dpi = 300, bbox_inches = 'tight')
Fitting failed for line_n CTL01A_5: Optimal parameters not found: The maximum number of function evaluations is exceeded.
donor_map_names = {i:j for i, j in zip(total_df['line'], total_df['line'])}
donor_map_names['CHD2WT'] = 'UCSFi001-A'
donor_map_names['CHD8WT'] = 'H9'
donor_map_names['CTL01A'] = 'CTL01'
total_df['line'] = total_df['line'].map(donor_map_names)
total_df['line_n_split'] = total_df['line'] + '_' + total_df['n_split'].astype('str')
order_line_n = total_df.line_n_split.unique().tolist()
order_line_n.sort()
line_palette = {'CTL01': '#DBB807',
'CTL08A': '#0FB248',
'CTL04E': '#FF0054',
'CTL02A': '#7B00FF',
'H9': '#72190E',
'H1': '#994F88',
'CTL05A': '#1965B0',
'CTL07C': '#437DBF',
'CTL06F': '#CAE0AB',
'CTL09A': '#FFFF00',
'KTD8.2': '#E65518',
'UCSFi001-A': '#7BAFDE'}
fig, ax = plt.subplots(5,9, figsize = (5*12, 8*5))
ax = ax.flatten().T
ax_index = 0
fitted_param = {}
def exp_model(t, a, b, c):
return a * np.exp(b * t) + c - a
for line_n in order_line_n:
fitted_param[line_n] = {}
sub = total_df[total_df.line_n_split == line_n]
sub = sub.sort_values('split_time')
if len(sub.split_time.unique()) > 5:
# Calculate the cumulative sum of the mean total area for each split_time
y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
y_min_0 = sub[(sub.split_time == sub.split_time.min()) & (sub['Area (mm2)'] > 0)]['Area (mm2)'].min()
#print(y_min_0)
x = np.array(sub.split_time.unique())
hue = [line_n.split('_')[0]]*len(y)
try:
#print(min(y))
popt, pcov = curve_fit(exp_model, x, y, p0=(min(y), 0.1, y_min_0), bounds=((-np.inf, 0, y_min_0), (np.inf,np.inf, np.inf)))
fitted_param[line_n]['a'] = popt[0]
fitted_param[line_n]['rate'] = popt[1]
fitted_param[line_n]['min(y)'] = min(y)
fitted_param[line_n]['c'] = popt[2]
fitted_param[line_n]['mean_cum'] = y
fitted_param[line_n]['split_time'] = x
a, b, c = popt
# Generate fitted y values
y_fitted = exp_model(x, a, b, c)
y_fitted_viz = exp_model(np.float64(list(range(0, 200))), a, b, c)
fitted_param[line_n]['y_fitted'] = y_fitted
fitted_param[line_n]['MSLE'] = mean_squared_log_error(y, y_fitted)
fitted_param[line_n]['r2'] = r2_score(y, y_fitted)
sns.lineplot(y = y_fitted_viz, x = np.float64(list(range(0, 200))), ax = ax[ax_index]) #, markers = True, hue = hue, palette=line_palette, linewidth = 5, legend = None)
sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, s = 150, legend=None)
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line_n}', fontsize = 40)
ax[ax_index].set_ylabel('Cumulative area (mm2)', fontsize = 35)
ax[ax_index].set_xlabel('Time point', fontsize = 35)
ax[ax_index].set_xlim(-10, 220)
ax[ax_index].set_ylim(-10, 85000)
_ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 30)
_ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 30)
ax_index += 1
except RuntimeError as e:
print(f"Fitting failed for line_n {line_n}: {e}")
plt.tight_layout()
plt.savefig('./figures/Fitted_cum_area_per_split_new_viz_new_name.svg', dpi = 300, bbox_inches = 'tight')
Fitting failed for line_n CTL01_5: Optimal parameters not found: The maximum number of function evaluations is exceeded.
donor_map_names = {i:j for i, j in zip(total_df['line'], total_df['line'])}
donor_map_names['CHD2WT'] = 'UCSFi001-A'
donor_map_names['CHD8WT'] = 'H9'
donor_map_names['CTL01'] = 'CTL01A'
total_df['line'] = total_df['line'].map(donor_map_names)
total_df['line_n_split'] = total_df['line'] + '_' + total_df['n_split'].astype('str')
order_line_n = total_df.line_n_split.unique().tolist()
order_line_n.sort()
line_palette = {'CTL01A': '#DBB807',
'CTL08A': '#0FB248',
'CTL04E': '#FF0054',
'CTL02A': '#7B00FF',
'H9': '#72190E',
'H1': '#994F88',
'CTL05A': '#1965B0',
'CTL07C': '#437DBF',
'CTL06F': '#CAE0AB',
'CTL09A': '#FFFF00',
'KTD8.2': '#E65518',
'UCSFi001-A': '#7BAFDE'}
fig, ax = plt.subplots(5,9, figsize = (5*12, 8*5))
ax = ax.flatten().T
ax_index = 0
fitted_param = {}
def exp_model(t, a, b, c):
return a * np.exp(b * t) + c - a
for line_n in order_line_n:
fitted_param[line_n] = {}
sub = total_df[total_df.line_n_split == line_n]
sub = sub.sort_values('split_time')
if len(sub.split_time.unique()) > 5:
# Calculate the cumulative sum of the mean total area for each split_time
y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
y_min_0 = sub[(sub.split_time == sub.split_time.min()) & (sub['Area (mm2)'] > 0)]['Area (mm2)'].min()
#print(y_min_0)
x = np.array(sub.split_time.unique())
hue = [line_n.split('_')[0]]*len(y)
try:
#print(min(y))
popt, pcov = curve_fit(exp_model, x, y, p0=(min(y), 0.1, y_min_0), bounds=((-np.inf, 0, y_min_0), (np.inf,np.inf, np.inf)))
fitted_param[line_n]['a'] = popt[0]
fitted_param[line_n]['rate'] = popt[1]
fitted_param[line_n]['min(y)'] = min(y)
fitted_param[line_n]['c'] = popt[2]
fitted_param[line_n]['mean_cum'] = y
fitted_param[line_n]['split_time'] = x
a, b, c = popt
# Generate fitted y values
y_fitted = exp_model(x, a, b, c)
y_fitted_viz = exp_model(np.float64(list(range(0, 200))), a, b, c)
fitted_param[line_n]['y_fitted'] = y_fitted
fitted_param[line_n]['MSLE'] = mean_squared_log_error(y, y_fitted)
fitted_param[line_n]['r2'] = r2_score(y, y_fitted)
sns.lineplot(y = y_fitted, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, linewidth = 5, legend = None)
sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, s = 150, legend=None)
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line_n}', fontsize = 40)
ax[ax_index].set_ylabel('Cumulative area (mm2)', fontsize = 35)
ax[ax_index].set_xlabel('Time point', fontsize = 35)
#ax[ax_index].set_xlim(-10, 220)
#ax[ax_index].set_ylim(-10, 85000)
_ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 30)
_ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 30)
ax_index += 1
except RuntimeError as e:
print(f"Fitting failed for line_n {line_n}: {e}")
plt.tight_layout()
plt.savefig('./figures/Fitted_cum_area_per_split_orig_viz_new_name.svg', dpi = 300, bbox_inches = 'tight')
Fitting failed for line_n CTL01A_5: Optimal parameters not found: The maximum number of function evaluations is exceeded.
sns.set(style="white", palette="Paired", color_codes=True)
fig, ax = plt.subplots(figsize=(7,5))
labels = []
lc = []
handles = []
all_lines = {}
total_df_no_first = total_df[total_df.n_split != '1'].copy()
for l in total_df.line.unique():
color = line_palette[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
if len(subsub.split_time.unique()) > 5:
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (mm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['cumulative'] = summary_df['mean'].cumsum()
farray = np.array(summary_df['cumulative'].values)
y_pos = summary_df.split_time.values
popt, pcov = curve_fit(exp_model, y_pos, farray, p0=(max(farray), 0.1, 0))
a, b, c = popt
y_fitted = exp_model(y_pos, a, b, c)
line, = ax.plot(y_pos, y_fitted, color = color, marker = '.')
ax.xaxis.set_tick_params(rotation=90)
all_lines[f'{l}_{split}'] = line
labels.append(l)
handles.append(line)
lc.append(color)
plt.legend(handles, labels, bbox_to_anchor = (1,1))
<matplotlib.legend.Legend at 0x7f22c79c3d60>
lines = total_df.line.unique()
highlight_growth_curves(all_lines, xlabel = 'Hours from split', ylabel = 'Cumulative area (mm2)', lines = lines, fontsize = 20)
plt.savefig('./figures/cumulative_growth_curve_per_line.svg', dpi = 300)
fitted_df = pd.DataFrame.from_dict(fitted_param).T
fitted_df['Line'] = fitted_df.reset_index()['index'].apply(lambda x: x.split('_')[0]).values
fitted_df['split'] = fitted_df.reset_index()['index'].apply(lambda x: x.split('_')[1]).values
fitted_df = fitted_df[~fitted_df.a.isna()]
fitted_df['a+c'] = fitted_df['a'] + fitted_df['c']
fitted_df[fitted_df['a+c'] > 0].shape
(43, 12)
fitted_df
| a | rate | min(y) | c | mean_cum | split_time | y_fitted | MSLE | r2 | Line | split | a+c | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CTL01A_2 | 53.590939 | 0.031439 | 48.98586 | 19.7823 | [48.98585999999999, 195.08369999999996, 285.83... | [49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 120.0] | [216.30031656044085, 287.82385334475316, 481.6... | 0.377437 | 0.955038 | CTL01A | 2 | 73.373239 |
| CTL01A_4 | 2552.990753 | 0.020925 | 407.73135 | 357.00324 | [407.73135, 650.817798, 1470.892803, 2865.8591... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0] | [357.00324, 822.2200679524135, 2111.6218893054... | 0.038346 | 0.958803 | CTL01A | 4 | 2909.993993 |
| CTL02A_1 | 516.84067 | 0.02225 | 405.32601 | 209.46054 | [405.32601, 534.56623, 880.29901, 1318.17853, ... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [923.4798879738361, 1196.3659463168015, 1792.1... | 0.202706 | 0.974893 | CTL02A | 1 | 726.30121 |
| CTL02A_2 | 363.236713 | 0.043378 | 481.002864 | 291.49878 | [481.00286399999993, 682.5998189999999, 1748.9... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.0] | [624.5182863894054, 957.028292531812, 1900.212... | 0.030689 | 0.992677 | CTL02A | 2 | 654.735493 |
| CTL02A_3 | 6321.707636 | 0.016347 | 627.73808 | 249.83244 | [627.73808, 992.027444, 1508.292068, 2111.0835... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [249.83244000000195, 788.2321280457018, 2694.5... | 0.180534 | 0.975446 | CTL02A | 3 | 6571.540076 |
| CTL02A_5 | 259.642996 | 0.037966 | 248.92647 | 223.89396 | [248.92647, 348.61422, 667.6209999999999, 927.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [223.89396000008873, 440.8882405407524, 610.04... | 0.019593 | 0.983085 | CTL02A | 5 | 483.536956 |
| CTL04E_2 | 312.465185 | 0.024744 | 114.00318 | 114.00318 | [114.00317999999999, 419.92985999999996, 648.0... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [367.40032074991313, 508.5498721900076, 851.96... | 0.170421 | 0.985605 | CTL04E | 2 | 426.468365 |
| CTL04E_3 | 497.069779 | 0.023063 | 807.9486 | 807.9486 | [807.9485999999999, 925.2285899999999, 1018.98... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [1013.3974633872776, 1175.452778497335, 1532.7... | 0.102829 | 0.976291 | CTL04E | 3 | 1305.018379 |
| CTL04E_4 | 3657.729993 | 0.017852 | 463.8341 | 327.13452 | [463.8341, 808.1382349999999, 1096.15610299999... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [327.13452000000007, 888.649780863635, 2384.63... | 0.072583 | 0.990038 | CTL04E | 4 | 3984.864513 |
| CTL04E_5 | 1127.103565 | 0.037623 | 630.44932 | 320.99214 | [630.4493200000001, 855.2126800000001, 968.046... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [320.9921400000087, 1251.6365258470114, 1974.2... | 0.327523 | 0.835627 | CTL04E | 5 | 1448.095705 |
| CTL05A_1 | 316.567038 | 0.030813 | 296.488308 | 189.37602 | [296.48830799999996, 923.25726, 1722.042108, 2... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [925.6609348972702, 1262.1413093635813, 2078.4... | 0.143865 | 0.99813 | CTL05A | 1 | 505.943058 |
| CTL05A_2 | 21.167943 | 0.043012 | 78.093855 | 186.873482 | [78.09385499999999, 193.287147, 241.072407, 38... | [9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0, 96.0... | [196.87989118736408, 225.13412776210168, 253.2... | 0.075352 | 0.999672 | CTL05A | 2 | 208.041424 |
| CTL05A_3 | 967.663799 | 0.01896 | 544.203345 | 331.29384 | [544.2033449999999, 640.3876889999999, 671.980... | [0.0, 15.0, 23.0, 39.0, 47.0, 64.0, 72.0, 87.0... | [331.293840000807, 649.616394634156, 860.24249... | 0.123883 | 0.984739 | CTL05A | 3 | 1298.957639 |
| CTL06F_1 | 73.156567 | 0.038794 | 71.0148 | 56.74836 | [71.0148, 191.69752499999998, 376.873929, 754.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.... | [56.74835999999999, 315.73823501966706, 454.52... | 0.035807 | 0.998507 | CTL06F | 1 | 129.904927 |
| CTL06F_2 | 68.093768 | 0.048632 | 84.18644 | 229.702436 | [84.18644, 202.243508, 373.50316399999997, 681... | [0.0, 9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0,... | [229.7024356845842, 267.094655263772, 380.3884... | 0.120596 | 0.998732 | CTL06F | 2 | 297.796204 |
| CTL06F_3 | 3463.1939 | 0.017721 | 317.817864 | 222.41874 | [317.817864, 537.075576, 993.100164, 1955.1224... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [222.41874000000007, 543.2838035123386, 1695.5... | 0.0539 | 0.9908 | CTL06F | 3 | 3685.61264 |
| CTL06F_5 | 577.234655 | 0.028446 | 378.69431 | 411.150264 | [378.69431000000003, 785.843438, 1021.701998, ... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [411.1502639146694, 743.8635617830383, 976.395... | 0.003022 | 0.996178 | CTL06F | 5 | 988.384919 |
| CTL07C_2 | 94.048429 | 0.031126 | 37.458996 | 14.91918 | [37.458996, 104.79609599999999, 185.987292, 28... | [9.0, 24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0... | [45.32533050619237, 119.37803266330768, 183.55... | 0.015759 | 0.990575 | CTL07C | 2 | 108.967609 |
| CTL07C_3 | 613.974584 | 0.035564 | 913.37922 | 784.329447 | [913.3792199999999, 1250.2052039999999, 1652.8... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [784.3294474004915, 1217.076290757244, 1611.93... | 0.00529 | 0.998319 | CTL07C | 3 | 1398.304031 |
| CTL07C_4 | 2306.799129 | 0.019101 | 182.26074 | 155.64468 | [182.26073999999997, 304.74484799999993, 570.8... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [155.64467999999988, 386.81439502233434, 1228.... | 0.196042 | 0.978949 | CTL07C | 4 | 2462.443809 |
| CTL07C_6 | 1088.712664 | 0.030355 | 273.1388 | 161.54142 | [273.1388, 520.0346, 1032.9212599999998, 2528.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [161.54142000000002, 842.2836199395206, 1328.6... | 0.09787 | 0.982106 | CTL07C | 6 | 1250.254084 |
| CTL08A_1 | 451.677536 | 0.033521 | 127.16148 | 39.47076 | [127.16147999999998, 431.94275999999996, 1240.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [39.470760000000155, 1257.290524072777, 1845.1... | 0.335793 | 0.980946 | CTL08A | 1 | 491.148296 |
| CTL08A_2 | 268.823282 | 0.033213 | 62.86314 | 45.72906 | [62.86313999999999, 162.41026799999997, 252.89... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [45.72906000000023, 219.31865687265957, 373.45... | 0.07015 | 0.996042 | CTL08A | 2 | 314.552342 |
| CTL08A_3 | 1710.611504 | 0.021725 | 140.72619 | 62.59404 | [140.72618999999997, 259.12742999999995, 598.6... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [62.594040000002224, 258.8779884482817, 993.49... | 0.258221 | 0.973682 | CTL08A | 3 | 1773.205544 |
| CTL08A_5 | 11488.018902 | 0.003436 | 273.38996 | 274.289889 | [273.38996000000003, 314.30944400000004, 1437.... | [0.0, 16.0, 17.0, 24.0, 39.0, 48.0, 63.0] | [274.28988906899576, 923.5431498814814, 965.31... | 0.194679 | 0.886176 | CTL08A | 5 | 11762.308791 |
| CTL09A_2 | 100.26601 | 0.036516 | 158.31084 | 124.0827 | [158.31083999999998, 305.1929799999999, 625.27... | [33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 12... | [358.39217444033267, 623.9322771951977, 827.53... | 0.165134 | 0.9842 | CTL09A | 2 | 224.34871 |
| CTL09A_3 | 1018.919691 | 0.028298 | 416.533404 | 178.572 | [416.53340399999996, 845.522964, 1353.60639600... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [717.3621667745616, 1169.182503451074, 2231.79... | 0.091367 | 0.98176 | CTL09A | 3 | 1197.491691 |
| CTL09A_4 | 1424.537893 | 0.024477 | 325.85664 | 325.85664 | [325.85663999999997, 564.9259079999999, 1003.4... | [9.0, 24.0, 32.0, 48.0, 56.0, 73.0, 81.0, 96.0... | [676.9175265747774, 1464.5997812217104, 2019.0... | 0.19694 | 0.990837 | CTL09A | 4 | 1750.394533 |
| CTL09A_5 | 245.43844 | 0.044788 | 380.20518 | 522.094091 | [380.20518, 567.604212, 948.6578159999999, 174... | [0.0, 8.0, 24.0, 32.0, 47.0, 56.0, 71.0] | [522.0940905069889, 627.8547624096685, 995.732... | 0.029391 | 0.98726 | CTL09A | 5 | 767.532531 |
| H1_1 | 335.579505 | 0.022767 | 48.570756 | 12.51798 | [48.570755999999996, 109.48395599999999, 128.7... | [9.0, 24.0, 33.0, 48.0, 57.0, 72.0, 83.0, 96.0... | [88.83137005064896, 256.4993862718479, 388.296... | 0.418782 | 0.98154 | H1 | 1 | 348.097485 |
| H1_2 | 1751.44934 | 0.013332 | 940.95162 | 940.95162 | [940.9516199999999, 1109.298648, 1189.804168, ... | [0.0, 15.0, 39.0, 47.0, 64.0, 72.0, 87.0, 95.0... | [940.9516200004766, 1328.6959694906852, 2135.3... | 0.161432 | 0.957078 | H1 | 2 | 2692.40096 |
| H9_1 | 1628.27244 | 0.024254 | 143.7201 | 133.4598 | [143.72009999999997, 367.2745799999999, 947.59... | [0.0, 15.0, 24.0, 40.0, 48.0, 63.0, 72.0, 87.0... | [133.45980000082363, 847.9387957534793, 1419.4... | 0.102503 | 0.986148 | H9 | 1 | 1761.73224 |
| H9_2 | 250.804206 | 0.045415 | 114.8643 | 58.4637 | [114.86429999999999, 704.2667159999999, 1200.7... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [303.32447520829385, 553.5933001556893, 1281.8... | 0.142488 | 0.999084 | H9 | 2 | 309.267906 |
| H9_3 | 4369.765458 | 0.015581 | 344.70882 | 114.95124 | [344.70881999999995, 429.92740799999996, 542.4... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [114.95124000000305, 468.9929864609403, 1712.7... | 0.344004 | 0.975495 | H9 | 3 | 4484.716698 |
| H9_5 | 481.000261 | 0.023808 | 308.090865 | 286.856808 | [308.090865, 343.77042, 682.5535199999999, 935... | [0.0, 16.0, 17.0, 39.0, 48.0, 63.0] | [286.856807514086, 509.8663602418136, 526.8283... | 0.03998 | 0.965619 | H9 | 5 | 767.857069 |
| KTD8.2_1 | 90.861505 | 0.041092 | 209.4633 | 209.4633 | [209.46329999999998, 355.660845, 580.268264999... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [209.46330000000057, 569.8131332224608, 771.72... | 0.04646 | 0.986486 | KTD8.2 | 1 | 300.324805 |
| KTD8.2_2 | 143.611111 | 0.039801 | 271.62816 | 667.25949 | [271.62816, 429.56363999999996, 1188.767879999... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [667.2594903693514, 784.5436633996587, 896.927... | 0.133148 | 0.987279 | KTD8.2 | 2 | 810.870602 |
| KTD8.2_3 | 786.554401 | 0.018607 | 40.886088 | 21.48798 | [40.886087999999994, 148.963548, 230.079948, 3... | [9.0, 28.0, 33.0, 48.0, 57.0, 72.0, 80.0, 96.0... | [164.8807600222509, 559.2701541222905, 688.395... | 0.490549 | 0.988663 | KTD8.2 | 3 | 808.042381 |
| KTD8.2_4 | 642.619191 | 0.026211 | 539.310133 | 512.57997 | [539.3101333333333, 824.4388333333333, 1098.55... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [512.5799696858405, 847.3969548514299, 1075.42... | 0.003678 | 0.987125 | KTD8.2 | 4 | 1155.19916 |
| UCSFi001-A_2 | 223.991762 | 0.026581 | 179.12193 | 175.46148 | [179.12193, 422.4456, 664.010115, 768.59238, 1... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [375.3944790902663, 489.9689160250534, 775.398... | 0.097114 | 0.970674 | UCSFi001-A | 2 | 399.453242 |
| UCSFi001-A_3 | 406.117615 | 0.025661 | 313.51438 | 280.08756 | [313.51438, 394.810525, 511.56956499999995, 92... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0... | [280.0875600000011, 470.7593523383308, 625.805... | 0.013113 | 0.995604 | UCSFi001-A | 3 | 686.205175 |
| UCSFi001-A_4 | 4524.912278 | 0.016863 | 250.04634 | 250.04634 | [250.04633999999996, 519.1565519999999, 1278.6... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [250.0463400000417, 903.5736384696875, 2622.76... | 0.084851 | 0.99402 | UCSFi001-A | 4 | 4774.958618 |
| UCSFi001-A_5 | 264.778941 | 0.055867 | 675.93918 | 505.45398 | [675.93918, 889.3241639999999, 1171.628004, 18... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [505.4539800000207, 887.9480350639889, 1252.69... | 0.036406 | 0.981117 | UCSFi001-A | 5 | 770.232921 |
fitted_df[fitted_df['a+c'] < 0].shape
(0, 12)
fitted_df_filtered = fitted_df[fitted_df['r2'] > 0.9]
fitted_df_filtered
| a | rate | min(y) | c | mean_cum | split_time | y_fitted | MSLE | r2 | Line | split | a+c | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CTL01A_2 | 53.590939 | 0.031439 | 48.98586 | 19.7823 | [48.98585999999999, 195.08369999999996, 285.83... | [49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 120.0] | [216.30031656044085, 287.82385334475316, 481.6... | 0.377437 | 0.955038 | CTL01A | 2 | 73.373239 |
| CTL01A_4 | 2552.990753 | 0.020925 | 407.73135 | 357.00324 | [407.73135, 650.817798, 1470.892803, 2865.8591... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0] | [357.00324, 822.2200679524135, 2111.6218893054... | 0.038346 | 0.958803 | CTL01A | 4 | 2909.993993 |
| CTL02A_1 | 516.84067 | 0.02225 | 405.32601 | 209.46054 | [405.32601, 534.56623, 880.29901, 1318.17853, ... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [923.4798879738361, 1196.3659463168015, 1792.1... | 0.202706 | 0.974893 | CTL02A | 1 | 726.30121 |
| CTL02A_2 | 363.236713 | 0.043378 | 481.002864 | 291.49878 | [481.00286399999993, 682.5998189999999, 1748.9... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.0] | [624.5182863894054, 957.028292531812, 1900.212... | 0.030689 | 0.992677 | CTL02A | 2 | 654.735493 |
| CTL02A_3 | 6321.707636 | 0.016347 | 627.73808 | 249.83244 | [627.73808, 992.027444, 1508.292068, 2111.0835... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [249.83244000000195, 788.2321280457018, 2694.5... | 0.180534 | 0.975446 | CTL02A | 3 | 6571.540076 |
| CTL02A_5 | 259.642996 | 0.037966 | 248.92647 | 223.89396 | [248.92647, 348.61422, 667.6209999999999, 927.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [223.89396000008873, 440.8882405407524, 610.04... | 0.019593 | 0.983085 | CTL02A | 5 | 483.536956 |
| CTL04E_2 | 312.465185 | 0.024744 | 114.00318 | 114.00318 | [114.00317999999999, 419.92985999999996, 648.0... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [367.40032074991313, 508.5498721900076, 851.96... | 0.170421 | 0.985605 | CTL04E | 2 | 426.468365 |
| CTL04E_3 | 497.069779 | 0.023063 | 807.9486 | 807.9486 | [807.9485999999999, 925.2285899999999, 1018.98... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [1013.3974633872776, 1175.452778497335, 1532.7... | 0.102829 | 0.976291 | CTL04E | 3 | 1305.018379 |
| CTL04E_4 | 3657.729993 | 0.017852 | 463.8341 | 327.13452 | [463.8341, 808.1382349999999, 1096.15610299999... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [327.13452000000007, 888.649780863635, 2384.63... | 0.072583 | 0.990038 | CTL04E | 4 | 3984.864513 |
| CTL05A_1 | 316.567038 | 0.030813 | 296.488308 | 189.37602 | [296.48830799999996, 923.25726, 1722.042108, 2... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [925.6609348972702, 1262.1413093635813, 2078.4... | 0.143865 | 0.99813 | CTL05A | 1 | 505.943058 |
| CTL05A_2 | 21.167943 | 0.043012 | 78.093855 | 186.873482 | [78.09385499999999, 193.287147, 241.072407, 38... | [9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0, 96.0... | [196.87989118736408, 225.13412776210168, 253.2... | 0.075352 | 0.999672 | CTL05A | 2 | 208.041424 |
| CTL05A_3 | 967.663799 | 0.01896 | 544.203345 | 331.29384 | [544.2033449999999, 640.3876889999999, 671.980... | [0.0, 15.0, 23.0, 39.0, 47.0, 64.0, 72.0, 87.0... | [331.293840000807, 649.616394634156, 860.24249... | 0.123883 | 0.984739 | CTL05A | 3 | 1298.957639 |
| CTL06F_1 | 73.156567 | 0.038794 | 71.0148 | 56.74836 | [71.0148, 191.69752499999998, 376.873929, 754.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.... | [56.74835999999999, 315.73823501966706, 454.52... | 0.035807 | 0.998507 | CTL06F | 1 | 129.904927 |
| CTL06F_2 | 68.093768 | 0.048632 | 84.18644 | 229.702436 | [84.18644, 202.243508, 373.50316399999997, 681... | [0.0, 9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0,... | [229.7024356845842, 267.094655263772, 380.3884... | 0.120596 | 0.998732 | CTL06F | 2 | 297.796204 |
| CTL06F_3 | 3463.1939 | 0.017721 | 317.817864 | 222.41874 | [317.817864, 537.075576, 993.100164, 1955.1224... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [222.41874000000007, 543.2838035123386, 1695.5... | 0.0539 | 0.9908 | CTL06F | 3 | 3685.61264 |
| CTL06F_5 | 577.234655 | 0.028446 | 378.69431 | 411.150264 | [378.69431000000003, 785.843438, 1021.701998, ... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [411.1502639146694, 743.8635617830383, 976.395... | 0.003022 | 0.996178 | CTL06F | 5 | 988.384919 |
| CTL07C_2 | 94.048429 | 0.031126 | 37.458996 | 14.91918 | [37.458996, 104.79609599999999, 185.987292, 28... | [9.0, 24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0... | [45.32533050619237, 119.37803266330768, 183.55... | 0.015759 | 0.990575 | CTL07C | 2 | 108.967609 |
| CTL07C_3 | 613.974584 | 0.035564 | 913.37922 | 784.329447 | [913.3792199999999, 1250.2052039999999, 1652.8... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [784.3294474004915, 1217.076290757244, 1611.93... | 0.00529 | 0.998319 | CTL07C | 3 | 1398.304031 |
| CTL07C_4 | 2306.799129 | 0.019101 | 182.26074 | 155.64468 | [182.26073999999997, 304.74484799999993, 570.8... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [155.64467999999988, 386.81439502233434, 1228.... | 0.196042 | 0.978949 | CTL07C | 4 | 2462.443809 |
| CTL07C_6 | 1088.712664 | 0.030355 | 273.1388 | 161.54142 | [273.1388, 520.0346, 1032.9212599999998, 2528.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [161.54142000000002, 842.2836199395206, 1328.6... | 0.09787 | 0.982106 | CTL07C | 6 | 1250.254084 |
| CTL08A_1 | 451.677536 | 0.033521 | 127.16148 | 39.47076 | [127.16147999999998, 431.94275999999996, 1240.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [39.470760000000155, 1257.290524072777, 1845.1... | 0.335793 | 0.980946 | CTL08A | 1 | 491.148296 |
| CTL08A_2 | 268.823282 | 0.033213 | 62.86314 | 45.72906 | [62.86313999999999, 162.41026799999997, 252.89... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [45.72906000000023, 219.31865687265957, 373.45... | 0.07015 | 0.996042 | CTL08A | 2 | 314.552342 |
| CTL08A_3 | 1710.611504 | 0.021725 | 140.72619 | 62.59404 | [140.72618999999997, 259.12742999999995, 598.6... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [62.594040000002224, 258.8779884482817, 993.49... | 0.258221 | 0.973682 | CTL08A | 3 | 1773.205544 |
| CTL09A_2 | 100.26601 | 0.036516 | 158.31084 | 124.0827 | [158.31083999999998, 305.1929799999999, 625.27... | [33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 12... | [358.39217444033267, 623.9322771951977, 827.53... | 0.165134 | 0.9842 | CTL09A | 2 | 224.34871 |
| CTL09A_3 | 1018.919691 | 0.028298 | 416.533404 | 178.572 | [416.53340399999996, 845.522964, 1353.60639600... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [717.3621667745616, 1169.182503451074, 2231.79... | 0.091367 | 0.98176 | CTL09A | 3 | 1197.491691 |
| CTL09A_4 | 1424.537893 | 0.024477 | 325.85664 | 325.85664 | [325.85663999999997, 564.9259079999999, 1003.4... | [9.0, 24.0, 32.0, 48.0, 56.0, 73.0, 81.0, 96.0... | [676.9175265747774, 1464.5997812217104, 2019.0... | 0.19694 | 0.990837 | CTL09A | 4 | 1750.394533 |
| CTL09A_5 | 245.43844 | 0.044788 | 380.20518 | 522.094091 | [380.20518, 567.604212, 948.6578159999999, 174... | [0.0, 8.0, 24.0, 32.0, 47.0, 56.0, 71.0] | [522.0940905069889, 627.8547624096685, 995.732... | 0.029391 | 0.98726 | CTL09A | 5 | 767.532531 |
| H1_1 | 335.579505 | 0.022767 | 48.570756 | 12.51798 | [48.570755999999996, 109.48395599999999, 128.7... | [9.0, 24.0, 33.0, 48.0, 57.0, 72.0, 83.0, 96.0... | [88.83137005064896, 256.4993862718479, 388.296... | 0.418782 | 0.98154 | H1 | 1 | 348.097485 |
| H1_2 | 1751.44934 | 0.013332 | 940.95162 | 940.95162 | [940.9516199999999, 1109.298648, 1189.804168, ... | [0.0, 15.0, 39.0, 47.0, 64.0, 72.0, 87.0, 95.0... | [940.9516200004766, 1328.6959694906852, 2135.3... | 0.161432 | 0.957078 | H1 | 2 | 2692.40096 |
| H9_1 | 1628.27244 | 0.024254 | 143.7201 | 133.4598 | [143.72009999999997, 367.2745799999999, 947.59... | [0.0, 15.0, 24.0, 40.0, 48.0, 63.0, 72.0, 87.0... | [133.45980000082363, 847.9387957534793, 1419.4... | 0.102503 | 0.986148 | H9 | 1 | 1761.73224 |
| H9_2 | 250.804206 | 0.045415 | 114.8643 | 58.4637 | [114.86429999999999, 704.2667159999999, 1200.7... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [303.32447520829385, 553.5933001556893, 1281.8... | 0.142488 | 0.999084 | H9 | 2 | 309.267906 |
| H9_3 | 4369.765458 | 0.015581 | 344.70882 | 114.95124 | [344.70881999999995, 429.92740799999996, 542.4... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [114.95124000000305, 468.9929864609403, 1712.7... | 0.344004 | 0.975495 | H9 | 3 | 4484.716698 |
| H9_5 | 481.000261 | 0.023808 | 308.090865 | 286.856808 | [308.090865, 343.77042, 682.5535199999999, 935... | [0.0, 16.0, 17.0, 39.0, 48.0, 63.0] | [286.856807514086, 509.8663602418136, 526.8283... | 0.03998 | 0.965619 | H9 | 5 | 767.857069 |
| KTD8.2_1 | 90.861505 | 0.041092 | 209.4633 | 209.4633 | [209.46329999999998, 355.660845, 580.268264999... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [209.46330000000057, 569.8131332224608, 771.72... | 0.04646 | 0.986486 | KTD8.2 | 1 | 300.324805 |
| KTD8.2_2 | 143.611111 | 0.039801 | 271.62816 | 667.25949 | [271.62816, 429.56363999999996, 1188.767879999... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [667.2594903693514, 784.5436633996587, 896.927... | 0.133148 | 0.987279 | KTD8.2 | 2 | 810.870602 |
| KTD8.2_3 | 786.554401 | 0.018607 | 40.886088 | 21.48798 | [40.886087999999994, 148.963548, 230.079948, 3... | [9.0, 28.0, 33.0, 48.0, 57.0, 72.0, 80.0, 96.0... | [164.8807600222509, 559.2701541222905, 688.395... | 0.490549 | 0.988663 | KTD8.2 | 3 | 808.042381 |
| KTD8.2_4 | 642.619191 | 0.026211 | 539.310133 | 512.57997 | [539.3101333333333, 824.4388333333333, 1098.55... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [512.5799696858405, 847.3969548514299, 1075.42... | 0.003678 | 0.987125 | KTD8.2 | 4 | 1155.19916 |
| UCSFi001-A_2 | 223.991762 | 0.026581 | 179.12193 | 175.46148 | [179.12193, 422.4456, 664.010115, 768.59238, 1... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [375.3944790902663, 489.9689160250534, 775.398... | 0.097114 | 0.970674 | UCSFi001-A | 2 | 399.453242 |
| UCSFi001-A_3 | 406.117615 | 0.025661 | 313.51438 | 280.08756 | [313.51438, 394.810525, 511.56956499999995, 92... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0... | [280.0875600000011, 470.7593523383308, 625.805... | 0.013113 | 0.995604 | UCSFi001-A | 3 | 686.205175 |
| UCSFi001-A_4 | 4524.912278 | 0.016863 | 250.04634 | 250.04634 | [250.04633999999996, 519.1565519999999, 1278.6... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [250.0463400000417, 903.5736384696875, 2622.76... | 0.084851 | 0.99402 | UCSFi001-A | 4 | 4774.958618 |
| UCSFi001-A_5 | 264.778941 | 0.055867 | 675.93918 | 505.45398 | [675.93918, 889.3241639999999, 1171.628004, 18... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [505.4539800000207, 887.9480350639889, 1252.69... | 0.036406 | 0.981117 | UCSFi001-A | 5 | 770.232921 |
fig, ax = plt.subplots()
sns.barplot(data = fitted_df_filtered, x = 'split', y = 'rate', order = ['1', '2', '3', '4', '5', '6'], ax = ax)#, hue = 'Line', palette=line_palette)
_ = ax.set_ylabel('Growth rate', fontsize = 20)
_ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
_ = ax.set_xlabel('Passage', fontsize = 20)
_ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15)
fig, ax = plt.subplots(figsize = (10,4))
fitted_df_filtered['split'] = fitted_df_filtered['split'].astype('int')
sns.lineplot(data = fitted_df_filtered, x = 'split', y = 'rate', hue = 'Line',
ax = ax, palette=line_palette)
_ = ax.set_ylabel('Growth rate', fontsize = 20)
_ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
_ = ax.set_xlabel('Passage', fontsize = 20)
_ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15)
ax.legend(bbox_to_anchor = (1,1))
<matplotlib.legend.Legend at 0x7f22c57028f0>
fig, ax = plt.subplots()
sns.boxplot(data = fitted_df_filtered, x = 'Line', y = 'rate', ax = ax, palette=line_palette)
_ = ax.set_ylabel('Rate of area growth', fontsize = 20)
_ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
_ = ax.set_xlabel('Passage', fontsize = 20)
_ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15, rotation = 90)
fitted_df_filtered.sort_values(by = 'rate').to_csv('../../data/csv/iPSC_fitted_exp_area_sum_per_split.csv')
fitted_df_filtered.sort_values(by = 'rate')
| a | rate | min(y) | c | mean_cum | split_time | y_fitted | MSLE | r2 | Line | split | a+c | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| H1_2 | 1751.44934 | 0.013332 | 940.95162 | 940.95162 | [940.9516199999999, 1109.298648, 1189.804168, ... | [0.0, 15.0, 39.0, 47.0, 64.0, 72.0, 87.0, 95.0... | [940.9516200004766, 1328.6959694906852, 2135.3... | 0.161432 | 0.957078 | H1 | 2 | 2692.40096 |
| H9_3 | 4369.765458 | 0.015581 | 344.70882 | 114.95124 | [344.70881999999995, 429.92740799999996, 542.4... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [114.95124000000305, 468.9929864609403, 1712.7... | 0.344004 | 0.975495 | H9 | 3 | 4484.716698 |
| CTL02A_3 | 6321.707636 | 0.016347 | 627.73808 | 249.83244 | [627.73808, 992.027444, 1508.292068, 2111.0835... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [249.83244000000195, 788.2321280457018, 2694.5... | 0.180534 | 0.975446 | CTL02A | 3 | 6571.540076 |
| UCSFi001-A_4 | 4524.912278 | 0.016863 | 250.04634 | 250.04634 | [250.04633999999996, 519.1565519999999, 1278.6... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [250.0463400000417, 903.5736384696875, 2622.76... | 0.084851 | 0.99402 | UCSFi001-A | 4 | 4774.958618 |
| CTL06F_3 | 3463.1939 | 0.017721 | 317.817864 | 222.41874 | [317.817864, 537.075576, 993.100164, 1955.1224... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [222.41874000000007, 543.2838035123386, 1695.5... | 0.0539 | 0.9908 | CTL06F | 3 | 3685.61264 |
| CTL04E_4 | 3657.729993 | 0.017852 | 463.8341 | 327.13452 | [463.8341, 808.1382349999999, 1096.15610299999... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... | [327.13452000000007, 888.649780863635, 2384.63... | 0.072583 | 0.990038 | CTL04E | 4 | 3984.864513 |
| KTD8.2_3 | 786.554401 | 0.018607 | 40.886088 | 21.48798 | [40.886087999999994, 148.963548, 230.079948, 3... | [9.0, 28.0, 33.0, 48.0, 57.0, 72.0, 80.0, 96.0... | [164.8807600222509, 559.2701541222905, 688.395... | 0.490549 | 0.988663 | KTD8.2 | 3 | 808.042381 |
| CTL05A_3 | 967.663799 | 0.01896 | 544.203345 | 331.29384 | [544.2033449999999, 640.3876889999999, 671.980... | [0.0, 15.0, 23.0, 39.0, 47.0, 64.0, 72.0, 87.0... | [331.293840000807, 649.616394634156, 860.24249... | 0.123883 | 0.984739 | CTL05A | 3 | 1298.957639 |
| CTL07C_4 | 2306.799129 | 0.019101 | 182.26074 | 155.64468 | [182.26073999999997, 304.74484799999993, 570.8... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [155.64467999999988, 386.81439502233434, 1228.... | 0.196042 | 0.978949 | CTL07C | 4 | 2462.443809 |
| CTL01A_4 | 2552.990753 | 0.020925 | 407.73135 | 357.00324 | [407.73135, 650.817798, 1470.892803, 2865.8591... | [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0] | [357.00324, 822.2200679524135, 2111.6218893054... | 0.038346 | 0.958803 | CTL01A | 4 | 2909.993993 |
| CTL08A_3 | 1710.611504 | 0.021725 | 140.72619 | 62.59404 | [140.72618999999997, 259.12742999999995, 598.6... | [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... | [62.594040000002224, 258.8779884482817, 993.49... | 0.258221 | 0.973682 | CTL08A | 3 | 1773.205544 |
| CTL02A_1 | 516.84067 | 0.02225 | 405.32601 | 209.46054 | [405.32601, 534.56623, 880.29901, 1318.17853, ... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [923.4798879738361, 1196.3659463168015, 1792.1... | 0.202706 | 0.974893 | CTL02A | 1 | 726.30121 |
| H1_1 | 335.579505 | 0.022767 | 48.570756 | 12.51798 | [48.570755999999996, 109.48395599999999, 128.7... | [9.0, 24.0, 33.0, 48.0, 57.0, 72.0, 83.0, 96.0... | [88.83137005064896, 256.4993862718479, 388.296... | 0.418782 | 0.98154 | H1 | 1 | 348.097485 |
| CTL04E_3 | 497.069779 | 0.023063 | 807.9486 | 807.9486 | [807.9485999999999, 925.2285899999999, 1018.98... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [1013.3974633872776, 1175.452778497335, 1532.7... | 0.102829 | 0.976291 | CTL04E | 3 | 1305.018379 |
| H9_5 | 481.000261 | 0.023808 | 308.090865 | 286.856808 | [308.090865, 343.77042, 682.5535199999999, 935... | [0.0, 16.0, 17.0, 39.0, 48.0, 63.0] | [286.856807514086, 509.8663602418136, 526.8283... | 0.03998 | 0.965619 | H9 | 5 | 767.857069 |
| H9_1 | 1628.27244 | 0.024254 | 143.7201 | 133.4598 | [143.72009999999997, 367.2745799999999, 947.59... | [0.0, 15.0, 24.0, 40.0, 48.0, 63.0, 72.0, 87.0... | [133.45980000082363, 847.9387957534793, 1419.4... | 0.102503 | 0.986148 | H9 | 1 | 1761.73224 |
| CTL09A_4 | 1424.537893 | 0.024477 | 325.85664 | 325.85664 | [325.85663999999997, 564.9259079999999, 1003.4... | [9.0, 24.0, 32.0, 48.0, 56.0, 73.0, 81.0, 96.0... | [676.9175265747774, 1464.5997812217104, 2019.0... | 0.19694 | 0.990837 | CTL09A | 4 | 1750.394533 |
| CTL04E_2 | 312.465185 | 0.024744 | 114.00318 | 114.00318 | [114.00317999999999, 419.92985999999996, 648.0... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [367.40032074991313, 508.5498721900076, 851.96... | 0.170421 | 0.985605 | CTL04E | 2 | 426.468365 |
| UCSFi001-A_3 | 406.117615 | 0.025661 | 313.51438 | 280.08756 | [313.51438, 394.810525, 511.56956499999995, 92... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0... | [280.0875600000011, 470.7593523383308, 625.805... | 0.013113 | 0.995604 | UCSFi001-A | 3 | 686.205175 |
| KTD8.2_4 | 642.619191 | 0.026211 | 539.310133 | 512.57997 | [539.3101333333333, 824.4388333333333, 1098.55... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [512.5799696858405, 847.3969548514299, 1075.42... | 0.003678 | 0.987125 | KTD8.2 | 4 | 1155.19916 |
| UCSFi001-A_2 | 223.991762 | 0.026581 | 179.12193 | 175.46148 | [179.12193, 422.4456, 664.010115, 768.59238, 1... | [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... | [375.3944790902663, 489.9689160250534, 775.398... | 0.097114 | 0.970674 | UCSFi001-A | 2 | 399.453242 |
| CTL09A_3 | 1018.919691 | 0.028298 | 416.533404 | 178.572 | [416.53340399999996, 845.522964, 1353.60639600... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... | [717.3621667745616, 1169.182503451074, 2231.79... | 0.091367 | 0.98176 | CTL09A | 3 | 1197.491691 |
| CTL06F_5 | 577.234655 | 0.028446 | 378.69431 | 411.150264 | [378.69431000000003, 785.843438, 1021.701998, ... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [411.1502639146694, 743.8635617830383, 976.395... | 0.003022 | 0.996178 | CTL06F | 5 | 988.384919 |
| CTL07C_6 | 1088.712664 | 0.030355 | 273.1388 | 161.54142 | [273.1388, 520.0346, 1032.9212599999998, 2528.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [161.54142000000002, 842.2836199395206, 1328.6... | 0.09787 | 0.982106 | CTL07C | 6 | 1250.254084 |
| CTL05A_1 | 316.567038 | 0.030813 | 296.488308 | 189.37602 | [296.48830799999996, 923.25726, 1722.042108, 2... | [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... | [925.6609348972702, 1262.1413093635813, 2078.4... | 0.143865 | 0.99813 | CTL05A | 1 | 505.943058 |
| CTL07C_2 | 94.048429 | 0.031126 | 37.458996 | 14.91918 | [37.458996, 104.79609599999999, 185.987292, 28... | [9.0, 24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0... | [45.32533050619237, 119.37803266330768, 183.55... | 0.015759 | 0.990575 | CTL07C | 2 | 108.967609 |
| CTL01A_2 | 53.590939 | 0.031439 | 48.98586 | 19.7823 | [48.98585999999999, 195.08369999999996, 285.83... | [49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 120.0] | [216.30031656044085, 287.82385334475316, 481.6... | 0.377437 | 0.955038 | CTL01A | 2 | 73.373239 |
| CTL08A_2 | 268.823282 | 0.033213 | 62.86314 | 45.72906 | [62.86313999999999, 162.41026799999997, 252.89... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [45.72906000000023, 219.31865687265957, 373.45... | 0.07015 | 0.996042 | CTL08A | 2 | 314.552342 |
| CTL08A_1 | 451.677536 | 0.033521 | 127.16148 | 39.47076 | [127.16147999999998, 431.94275999999996, 1240.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [39.470760000000155, 1257.290524072777, 1845.1... | 0.335793 | 0.980946 | CTL08A | 1 | 491.148296 |
| CTL07C_3 | 613.974584 | 0.035564 | 913.37922 | 784.329447 | [913.3792199999999, 1250.2052039999999, 1652.8... | [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [784.3294474004915, 1217.076290757244, 1611.93... | 0.00529 | 0.998319 | CTL07C | 3 | 1398.304031 |
| CTL09A_2 | 100.26601 | 0.036516 | 158.31084 | 124.0827 | [158.31083999999998, 305.1929799999999, 625.27... | [33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 12... | [358.39217444033267, 623.9322771951977, 827.53... | 0.165134 | 0.9842 | CTL09A | 2 | 224.34871 |
| CTL02A_5 | 259.642996 | 0.037966 | 248.92647 | 223.89396 | [248.92647, 348.61422, 667.6209999999999, 927.... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [223.89396000008873, 440.8882405407524, 610.04... | 0.019593 | 0.983085 | CTL02A | 5 | 483.536956 |
| CTL06F_1 | 73.156567 | 0.038794 | 71.0148 | 56.74836 | [71.0148, 191.69752499999998, 376.873929, 754.... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.... | [56.74835999999999, 315.73823501966706, 454.52... | 0.035807 | 0.998507 | CTL06F | 1 | 129.904927 |
| KTD8.2_2 | 143.611111 | 0.039801 | 271.62816 | 667.25949 | [271.62816, 429.56363999999996, 1188.767879999... | [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... | [667.2594903693514, 784.5436633996587, 896.927... | 0.133148 | 0.987279 | KTD8.2 | 2 | 810.870602 |
| KTD8.2_1 | 90.861505 | 0.041092 | 209.4633 | 209.4633 | [209.46329999999998, 355.660845, 580.268264999... | [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] | [209.46330000000057, 569.8131332224608, 771.72... | 0.04646 | 0.986486 | KTD8.2 | 1 | 300.324805 |
| CTL05A_2 | 21.167943 | 0.043012 | 78.093855 | 186.873482 | [78.09385499999999, 193.287147, 241.072407, 38... | [9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0, 96.0... | [196.87989118736408, 225.13412776210168, 253.2... | 0.075352 | 0.999672 | CTL05A | 2 | 208.041424 |
| CTL02A_2 | 363.236713 | 0.043378 | 481.002864 | 291.49878 | [481.00286399999993, 682.5998189999999, 1748.9... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.0] | [624.5182863894054, 957.028292531812, 1900.212... | 0.030689 | 0.992677 | CTL02A | 2 | 654.735493 |
| CTL09A_5 | 245.43844 | 0.044788 | 380.20518 | 522.094091 | [380.20518, 567.604212, 948.6578159999999, 174... | [0.0, 8.0, 24.0, 32.0, 47.0, 56.0, 71.0] | [522.0940905069889, 627.8547624096685, 995.732... | 0.029391 | 0.98726 | CTL09A | 5 | 767.532531 |
| H9_2 | 250.804206 | 0.045415 | 114.8643 | 58.4637 | [114.86429999999999, 704.2667159999999, 1200.7... | [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] | [303.32447520829385, 553.5933001556893, 1281.8... | 0.142488 | 0.999084 | H9 | 2 | 309.267906 |
| CTL06F_2 | 68.093768 | 0.048632 | 84.18644 | 229.702436 | [84.18644, 202.243508, 373.50316399999997, 681... | [0.0, 9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0,... | [229.7024356845842, 267.094655263772, 380.3884... | 0.120596 | 0.998732 | CTL06F | 2 | 297.796204 |
| UCSFi001-A_5 | 264.778941 | 0.055867 | 675.93918 | 505.45398 | [675.93918, 889.3241639999999, 1171.628004, 18... | [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] | [505.4539800000207, 887.9480350639889, 1252.69... | 0.036406 | 0.981117 | UCSFi001-A | 5 | 770.232921 |
pd.DataFrame(fitted_df.groupby('Line')['rate'].mean().sort_values())
| rate | |
|---|---|
| Line | |
| H1 | 0.01805 |
| CTL08A | 0.022974 |
| CTL04E | 0.02582 |
| CTL01A | 0.026182 |
| H9 | 0.027264 |
| CTL07C | 0.029036 |
| CTL02A | 0.029985 |
| CTL05A | 0.030928 |
| UCSFi001-A | 0.031243 |
| KTD8.2 | 0.031428 |
| CTL06F | 0.033399 |
| CTL09A | 0.03352 |
data_tot = []
for line_n in fitted_df.index:
sub = fitted_df.loc[line_n]
data = pd.DataFrame({'mean_cum': sub['mean_cum'], 'split_time': sub['split_time'], 'line_n': [line_n] * len(sub['mean_cum'])})
data_tot.append(data)
data_tot = pd.concat(data_tot)
data_tot['Line'] = data_tot['line_n'].apply(lambda x: x.split('_')[0]).values
data_tot['split'] = data_tot['line_n'].apply(lambda x: x.split('_')[1]).values
fig, ax = plt.subplots(4,3, figsize = (20, 20))
ax = ax.flatten().T
ax_index = 0
fitted_param_line = {}
for line in order:
fitted_param_line[line] = {}
sub = data_tot[data_tot.Line == line]
sub = sub.sort_values('split_time')
if len(sub.split_time.unique()) > 5:
# Calculate the cumulative sum of the mean total area for each split_time
y = sub['mean_cum'].cumsum().values
x = np.array(sub.split_time)
try:
popt, pcov = curve_fit(exp_model, x, y, p0=(max(y), 0.1, 0))
fitted_param_line[line]['intercept'] = popt[0]
fitted_param_line[line]['rate'] = popt[1]
fitted_param_line[line]['mean_cum'] = y
fitted_param_line[line]['split_time'] = x
a, b, c = popt
# Generate fitted y values
y_fitted = exp_model(x, a, b, c)
fitted_param_line[line]['y_fitted'] = y_fitted
#fitted_param_line[line]['MSLE'] = mean_squared_log_error(y, y_fitted)
fitted_param_line[line]['r2'] = r2_score(y, y_fitted)
sns.lineplot(y = y_fitted, x = x, ax = ax[ax_index], markers = True)
sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True)
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{line}', fontsize = 30)
ax[ax_index].set_ylabel('Increase in area ', fontsize = 20)
ax[ax_index].set_xlabel('Time point', fontsize = 20)
_ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 15)
_ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 15)
ax_index += 1
except RuntimeError as e:
print(f"Fitting failed for line_n {line_n}: {e}")
plt.tight_layout()
Fitting failed for line_n UCSFi001-A_5: Optimal parameters not found: Number of calls to function has reached maxfev = 800.
fitted_df_line = pd.DataFrame.from_dict(fitted_param_line).T
fitted_df_line = fitted_df_line[~fitted_df_line.intercept.isna()]
fitted_df_line.sort_values('rate')
| intercept | rate | mean_cum | split_time | y_fitted | r2 | |
|---|---|---|---|---|---|---|
| CTL04E | 77718.739098 | 0.009307 | [463.8341, 1094.28342, 1902.4216549999999, 271... | [0.0, 0.0, 8.0, 15.0, 16.0, 24.0, 24.0, 24.0, ... | [-13318.63230998673, -13318.63230998673, -7311... | 0.977045 |
| KTD8.2 | 31449.366199 | 0.010281 | [209.46329999999998, 748.7734333333333, 1020.4... | [0.0, 0.0, 0.0, 9.0, 15.0, 16.0, 24.0, 24.0, 2... | [-5888.99421008131, -5888.99421008131, -5888.9... | 0.98984 |
| H9 | 42196.039006 | 0.013802 | [143.72009999999997, 451.810965, 796.519785, 1... | [0.0, 0.0, 0.0, 5.0, 15.0, 15.0, 16.0, 17.0, 2... | [-14202.131016338491, -14202.131016338491, -14... | 0.986461 |
| UCSFi001-A | 39047.258426 | 0.013856 | [313.51438, 563.56072, 1239.4998999999998, 175... | [0.0, 0.0, 0.0, 8.0, 15.0, 16.0, 24.0, 24.0, 2... | [-10942.223859024449, -10942.223859024449, -10... | 0.975132 |
| CTL07C | 23591.80031 | 0.015562 | [273.1388, 455.39954, 1368.77876, 1673.523608,... | [0.0, 0.0, 0.0, 5.0, 9.0, 15.0, 16.0, 20.0, 24... | [-3751.0660428077354, -3751.0660428077354, -37... | 0.996057 |
| CTL05A | 12604.421219 | 0.017384 | [544.2033449999999, 622.2971999999999, 1262.68... | [0.0, 9.0, 15.0, 23.0, 24.0, 33.0, 39.0, 39.0,... | [-12862.94760573278, -10728.29639654801, -9107... | 0.95798 |
| CTL02A | 26543.595486 | 0.018447 | [627.73808, 876.66455, 1868.6919939999998, 234... | [0.0, 0.0, 5.0, 15.0, 16.0, 20.0, 24.0, 24.0, ... | [-13963.093537789186, -13963.093537789186, -11... | 0.990371 |
| CTL09A | 19625.227048 | 0.019058 | [380.20518, 947.8093919999999, 1273.6660319999... | [0.0, 8.0, 9.0, 15.0, 24.0, 24.0, 24.0, 32.0, ... | [-11069.602807230889, -7837.397369140426, -739... | 0.990286 |
| H1 | 5121.873312 | 0.019555 | [940.9516199999999, 989.5223759999999, 2098.82... | [0.0, 9.0, 15.0, 24.0, 33.0, 39.0, 47.0, 48.0,... | [-4069.5514137167056, -3083.9292773786065, -23... | 0.987598 |
| CTL08A | 14967.053698 | 0.019842 | [127.16147999999998, 400.55144, 463.41458, 604... | [0.0, 0.0, 0.0, 0.0, 5.0, 15.0, 16.0, 17.0, 20... | [-8154.354064179996, -8154.354064179996, -8154... | 0.984026 |
| CTL06F | 14207.486024 | 0.019961 | [71.0148, 449.70911, 533.89555, 851.713414, 13... | [0.0, 0.0, 0.0, 0.0, 5.0, 9.0, 16.0, 20.0, 24.... | [-6175.114095213579, -6175.114095213579, -6175... | 0.993179 |
We use the logarithm of the area and then sum all the FOV for a specific time points in each line at each split. Then we smoothed it with gaussian_filter1d and computed the diff, corresponding to the first discrete derivative. We then use the cumsum() function to obtain the cumulative sum over the discrete differential of the growth.
fig, ax = plt.subplots(10,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
for l in total_df.line.unique():
#color = color_dict[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 3)
summary_df['derivative'] = summary_df['smoothed'].diff() / summary_df['split_time'].diff()
summary_df['cumulative'] = summary_df['derivative'].cumsum()
farray = np.array(summary_df['cumulative'])
y_pos = summary_df.split_time.values
ax[ax_index].errorbar(y_pos, farray, marker = 'o')
ax[ax_index].xaxis.set_tick_params(rotation=90)
ax[ax_index].set_title(f'{l}_split_{split}')
ax[ax_index].set_ylabel('Cumulative growth')
ax[ax_index].set_xlabel('Time point')
ax_index += 1
plt.show()
total_df.columns
Index(['total_area', 'perc_area', 'mean_area_per_colony', 'n_colonies',
'time_point', 'confluency/generation', 'hour', 'month', 'day', 'line',
'datetime', 'norm_factor', 'perc_area_norm', 'err_bar_mean', 'n_split',
'split_time', 'pixel_size', 'Area (microm2)', 'Area (mm2)',
'line_split', 'mean_area_tp', 'area_error', 'std', 'cv', 'line_n_split',
'logArea'],
dtype='object')
sns.set(style="white", palette="Paired", color_codes=True)
fig, ax = plt.subplots(figsize=(7,5))
labels = []
lc = []
handles = []
all_lines = {}
total_df_no_first = total_df[total_df.n_split != '1'].copy()
cumulative_dict_dfs = {}
for l in total_df.line.unique():
color = line_palette[l]
sub = total_df[total_df.line == l]
sub = sub.sort_values(by = 'datetime')
sub = sub[sub.n_split != 'day']
for split in sub.n_split.unique():
subsub = sub[sub.n_split == split]
if len(subsub.split_time.unique()) > 0:
# user defined function,
# with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
summary_df = summary_df.sort_values('split_time')
summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 3)
summary_df['derivative'] = summary_df['smoothed'].diff() / summary_df['split_time'].diff()
summary_df['cumulative'] = summary_df['derivative'].cumsum()
cumulative_dict_dfs[f'{l}_{split}'] = summary_df
farray = np.array(summary_df['cumulative'])
y_pos = summary_df.split_time.values
line, = ax.plot(y_pos, farray, color = color, marker = '.')
ax.xaxis.set_tick_params(rotation=90)
all_lines[f'{l}_{split}'] = line
labels.append(l)
handles.append(line)
lc.append(color)
plt.legend(handles, labels, bbox_to_anchor = (1,1))
<matplotlib.legend.Legend at 0x7f22b99145e0>
lines = total_df.line.unique()
highlight_growth_curves(all_lines,
xlabel = 'Hours from split',
ylabel = 'Cumulative growth',
lines = lines, fontsize = 20)
plt.savefig('./figures/cumulative_growth_curve_per_line.svg', dpi = 300)
I fit here a linear regression model taking into account all the cumulatve sums of all the splits for each line. We extrapolate the slope as the rate of growth of the line.
cumulative_df = pd.concat(cumulative_dict_dfs.values(), keys = cumulative_dict_dfs.keys()).reset_index()
cumulative_df['line'] = cumulative_df.level_0.apply(lambda x: x.split('_')[0])
cumulative_df
| level_0 | level_1 | time_point | mean | stds | hour | month | day | datetime | split_time | smoothed | derivative | cumulative | line | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | H1_1 | 0 | 06_11_23_t18 | 48570.756 | 30156.872900 | 18 | 11 | 06 | 2023-11-06 18:00:00 | 0.0 | 110143.004205 | NaN | NaN | H1 |
| 1 | H1_1 | 2 | 07_11_23_t9 | 60913.200 | 94781.769346 | 9 | 11 | 07 | 2023-11-07 09:00:00 | 15.0 | 137570.951804 | 1828.529840 | 1828.529840 | H1 |
| 2 | H1_1 | 1 | 07_11_23_t18 | 19238.925 | 12090.104228 | 18 | 11 | 07 | 2023-11-07 18:00:00 | 24.0 | 195181.480282 | 6401.169831 | 8229.699671 | H1 |
| 3 | H1_1 | 4 | 08_11_23_t9 | 72999.585 | 102548.695788 | 9 | 11 | 08 | 2023-11-08 09:00:00 | 39.0 | 287232.524109 | 6136.736255 | 14366.435926 | H1 |
| 4 | H1_1 | 3 | 08_11_23_t18 | 55094.292 | 61100.627807 | 18 | 11 | 08 | 2023-11-08 18:00:00 | 48.0 | 417456.373402 | 14469.316588 | 28835.752514 | H1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 459 | CTL01A_5 | 3 | 20_11_23_t9 | 188056.464 | 63817.297072 | 9 | 11 | 20 | 2023-11-20 09:00:00 | 23.0 | 425051.456538 | 3051.706842 | 6177.820375 | CTL01A |
| 460 | CTL01A_5 | 2 | 20_11_23_t18 | 633256.125 | 453126.246308 | 18 | 11 | 20 | 2023-11-20 18:00:00 | 32.0 | 483465.005972 | 6490.394382 | 12668.214756 | CTL01A |
| 461 | CTL01A_5 | 5 | 21_11_23_t9 | 568493.484 | 354817.532310 | 9 | 11 | 21 | 2023-11-21 09:00:00 | 47.0 | 543394.794597 | 3995.319242 | 16663.533998 | CTL01A |
| 462 | CTL01A_5 | 4 | 21_11_23_t17 | 386422.080 | NaN | 17 | 11 | 21 | 2023-11-21 17:00:00 | 55.0 | 592580.870876 | 6148.259535 | 22811.793533 | CTL01A |
| 463 | CTL01A_5 | 6 | 22_11_23_t10 | 1151778.360 | NaN | 10 | 11 | 22 | 2023-11-22 10:00:00 | 72.0 | 620324.506463 | 1631.978564 | 24443.772097 | CTL01A |
464 rows × 14 columns
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(cumulative_df.sort_values(by = 'split_time'), col="line", palette=line_palette,
col_wrap=4, height=5)
grid.map(sns.regplot, "split_time", "cumulative", order = 1)
# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
fitted_model = {}
for l in cumulative_df.line.unique():
sub = cumulative_df[cumulative_df.line == l]
sub = sub.sort_values(by = 'datetime')
key = f'{l}'
fitted_model[key] = {}
fitted_model[key]['slope'], fitted_model[key]['intercept'], fitted_model[key]['rvalue'], fitted_model[key]['pvalue'], fitted_model[key]['stderr'] = stats.linregress(sub['split_time'], sub['cumulative'].fillna(0))
fitted_model_df = pd.DataFrame.from_dict(fitted_model).T.reset_index()
fitted_model_df['line'] = fitted_model_df['index'].apply(lambda x: x.split('_')[0])
fitted_model_df.sort_values(by = 'slope')
| index | slope | intercept | rvalue | pvalue | stderr | line | |
|---|---|---|---|---|---|---|---|
| 11 | CTL01A | 644.317934 | -2403.923983 | 0.658753 | 3.427561e-04 | 153.439932 | CTL01A |
| 0 | H1 | 1092.216426 | -17870.728308 | 0.929554 | 3.246271e-13 | 83.369973 | H1 |
| 1 | CTL04E | 1957.784502 | -21455.175368 | 0.849852 | 4.029196e-12 | 196.950623 | CTL04E |
| 3 | CTL05A | 2002.574201 | -24668.872554 | 0.858594 | 5.454580e-12 | 199.285868 | CTL05A |
| 5 | KTD8.2 | 2123.269235 | -46580.571519 | 0.961890 | 3.878081e-24 | 95.435362 | KTD8.2 |
| 10 | UCSFi001-A | 2410.553184 | -34882.929624 | 0.873562 | 1.020601e-14 | 207.238467 | UCSFi001-A |
| 4 | H9 | 3810.734488 | -62725.125730 | 0.940884 | 5.551798e-19 | 225.539637 | H9 |
| 9 | CTL07C | 4069.466071 | -98092.360468 | 0.872476 | 2.224665e-13 | 369.740303 | CTL07C |
| 8 | CTL08A | 4263.987240 | -102963.382905 | 0.888082 | 9.120149e-16 | 340.563762 | CTL08A |
| 7 | CTL06F | 4575.687405 | -109378.476161 | 0.905129 | 3.365469e-17 | 331.626382 | CTL06F |
| 6 | CTL09A | 5028.233033 | -88363.366680 | 0.895983 | 1.307039e-14 | 409.717545 | CTL09A |
| 2 | CTL02A | 5231.682227 | -91740.048754 | 0.858920 | 1.340509e-12 | 506.011614 | CTL02A |
fitted_model_df.rvalue.mean()
0.8744732639082722
fitted_model_df.sort_values(by = 'slope').to_csv('../../data/csv/iPSC_fitted_lm_grouped.csv')